import bamboolib as bam
import pandas as pd
df = pd.read_csv('C:/Users/lenovo/Desktop/salary-data-cleaned.csv')
import pandas as pd; import numpy as np
# Step: Drop columns
df2 = df.drop(columns=['Unnamed: 6', 'Unnamed: 7', 'Unnamed: 8', 'Unnamed: 9', 'Unnamed: 10', 'Unnamed: 11'])
import pandas as pd; import numpy as np
# Step: Drop rows where Education Level is one of: phD
df3 = df2.loc[~(df2['Education Level'].isin(['phD']))]
df3
import plotly.express as px
fig = px.histogram(df3.dropna(subset=['Education Level', 'Salary']), x='Salary', y='Education Level', color='Gender')
fig
import plotly.express as px
fig = px.box(df3.dropna(subset=['Education Level']), x='Education Level', y='Age')
fig
df3
Age Gender Education Level Job Title \
0 21 Female High School Junior Sales Representative
1 21 Female High School Junior Sales Representative
2 21 Female High School Junior Sales Representative
3 21 Female High School Junior Sales Representative
4 21 Female High School Junior Sales Representative
... ... ... ... ...
6697 62 Male PhD Software Engineer Manager
6698 62 Male PhD Software Engineer Manager
6699 62 Male PhD Software Engineer Manager
6700 62 Male PhD Software Engineer Manager
6701 62 Male PhD Software Engineer Manager
Years of Experience Salary
0 0.0 25000.0
1 0.0 25000.0
2 0.0 25000.0
3 0.0 25000.0
4 0.0 25000.0
... ... ...
6697 19.0 200000.0
6698 20.0 200000.0
6699 19.0 200000.0
6700 20.0 200000.0
6701 19.0 200000.0
[6701 rows x 6 columns]